pip install pmdarima
pip install folium
pip install plotly
import numpy as np
import pandas as pd
import datetime
import plotly.express as px
import plotly.express as ex
import folium
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_model import ARIMA
from pmdarima import auto_arima
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
covid_data = pd.read_excel('./pak_data-v2.xlsx')
covid_data.head()
covid_data.describe()
Checking Null Values
covid_data.isnull().sum()
Checking Data Types of Columns
covid_data.dtypes
min_date = covid_data.Date.min()
max_date = covid_data.Date.max()
print('First Date of Case in Pakistan : {0} \nLast Date of Case as of Now : {1} \n' .format(min_date,max_date))
Make Copy of Original Data Set
pak_data = covid_data.copy()
len(pak_data)
pak_data['Total Confirmed Cases'] = 0
pak_data['Total Recovered'] = 0
pak_data['Total Deaths'] = 0
pak_data['Active Cases'] = 0
pak_data.head()
Computation of Total Confirmed , Recovered , Deaths and Active Cases
for i in range(0, len(pak_data)):
if (i == 0):
pak_data['Total Confirmed Cases'].iloc[i] = pak_data['New Cases'].iloc[i]
pak_data['Total Recovered'].iloc[i] = pak_data['Cured Cases'].iloc[i]
pak_data['Total Deaths'].iloc[i] = pak_data['Death Cases'].iloc[i]
pak_data['Active Cases'].iloc[i] = pak_data['New Cases'].iloc[i] - pak_data['Death Cases'].iloc[i] - pak_data['Cured Cases'].iloc[i]
else:
pak_data['Total Confirmed Cases'].iloc[i] = pak_data['Total Confirmed Cases'].iloc[i-1] + pak_data['New Cases'].iloc[i]
pak_data['Total Recovered'].iloc[i] = pak_data['Total Recovered'].iloc[i-1] + pak_data['Cured Cases'].iloc[i]
pak_data['Total Deaths'].iloc[i] = pak_data['Total Deaths'].iloc[i-1] + pak_data['Death Cases'].iloc[i]
pak_data['Active Cases'].iloc[i] = pak_data['Active Cases'].iloc[i-1] + pak_data['New Cases'].iloc[i] - pak_data['Death Cases'].iloc[i] - pak_data['Cured Cases'].iloc[i]
pak_data.head(50)
How many types of Cases exists currently in Pakistan?
confirmed = pak_data['New Cases'].sum()
recovered = pak_data['Cured Cases'].sum()
deaths = pak_data['Death Cases'].sum()
active = confirmed - (recovered - deaths)
print('Total Confirmed Cases : {0} \nTotal Recovered Cases : {1} \nTotal Deaths Cases : {2} \nTotal Active Cases : {3}' .format(confirmed, recovered, deaths, active))
labels = ['Active Cases','Recovered Cases','Death Cases']
sizes = [active,recovered,deaths]
color= ['Orange','Green','Red']
explode = []
for i in labels:
explode.append(0.1)
plt.figure(figsize= (10,6))
plt.pie(sizes, labels=labels, autopct='%3.1f%%', startangle=9, explode =explode,colors = color)
centre_circle = plt.Circle((0,0),0.60,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.title('Pakistan COVID-19 Cases',fontsize = 24)
plt.axis('equal')
plt.tight_layout()
pak_data.groupby(['Date'])['New Cases','Cured Cases','Death Cases'].sum().reset_index()
temp = pak_data.groupby(['Date'])['New Cases','Cured Cases','Death Cases'].sum().reset_index()
fig = px.scatter(temp, x="Date", y="New Cases", color="New Cases",
size='New Cases', hover_data=['New Cases'],
color_discrete_sequence = ex.colors.cyclical.IceFire)
fig.update_layout(title_text='Trend of Daily Cases in Pakistan',
plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
fig = px.scatter(temp, x="Date", y="Death Cases", color="Death Cases",
size='Death Cases', hover_data=['Death Cases'],
color_discrete_sequence = ex.colors.cyclical.IceFire)
fig.update_layout(title_text='Trend of Daily Death Cases in Pakistan',
plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Confirmed Cases'],
mode='lines+markers',marker_color='blue',name='Confimned Cases'))
fig.add_trace(go.Scatter(x=pak_data['Date'],y=pak_data['Active Cases'],
mode='lines+markers',marker_color='purple',name='Active Cases'))
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Recovered'],
mode='lines+markers',marker_color='green',name='Recovered'))
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Deaths'],
mode='lines+markers',marker_color='red',name='Deaths'))
fig.update_layout(title_text='Coronavirus Cases Statistics in Pakistan',plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
I am using Python Propet API to predict Time Series base Forcasting
confirmed = pak_data['Total Confirmed Cases'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(pak_data['Date'])
data['y'] = confirmed
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=15)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(15)
print(forecast)
fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Confirmed Cases'],mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Total Confirmed Cases (Predicted vs Actual)')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
deaths = pak_data['Total Deaths'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(pak_data['Date'])
data['y'] = deaths
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=15)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(15)
print(forecast)
fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Deaths'],mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],mode='lines+markers',marker_color='red',name='Predicted'))
fig.update_layout(title_text = 'Death Cases (Predicted vs Actual) using Prophe')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
recv = pak_data['Total Recovered'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(pak_data['Date'])
data['y'] = recv
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=15)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(15)
print(forecast)
#fig = plot_plotly(prop, prop_forecast)
#fig = prop.plot(prop_forecast,xlabel='Date',ylabel='Confirmed Cases')
fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Recovered'],
mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
mode='lines+markers',marker_color='yellow',name='Predicted'))
fig.update_layout(title_text = 'Recovered Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
Arima is a statistical Model for Time Series Forcasting.We will use ARIMA to ensure Model Accuracy of Our Prediction.which one better fit in our Situation.
cc = pak_data['Total Deaths'].values
# fit model
p,d,q = auto_arima(cc).order
print(p,d,q)
#model = SARIMAX(chk, order=(p,d,q), seasonal_order=(0,0,0,0),measurement_error=True)#seasonal_order=(1, 1, 1, 1))
model = ARIMA(pak_data['Total Deaths'],order=(p,d,q))
arima = model.fit(disp=True)
forecast = arima.forecast(steps= 15)
pred = list(forecast[0])
print(pred)
start_date = pak_data['Date'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Deaths'],
mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prediction_dates, y=pred,
mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Total Deaths cases Predicted vs Actual using ARIMA')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
cc = pak_data['Total Deaths'].values
# fit model
p,d,q = auto_arima(cc).order
print(p,d,q)
model = SARIMAX(cc, order=(p,d,q), seasonal_order=(0,0,0,0),measurement_error=True) #seasonal_order=(1, 1, 1, 1))
model_fit = model.fit(disp=False)
# make prediction
pred = model_fit.predict(len(cc), len(cc)+7)
print(pred)
start_date = pak_data['Date'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Deaths'],mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prediction_dates, y=pred,mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Total Deaths Cases Predicted vs Actual using SARIMA')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
province_cases = pak_data.groupby(['Date','Province'])['New Cases','Cured Cases','Death Cases'].sum().reset_index()
province_cases['Total Confirmed Cases'] = 0
province_cases['Total Recovered'] = 0
province_cases['Total Deaths'] = 0
province_cases['Active Cases'] = 0
province_list = province_cases['Province'].unique()
test = province_cases.copy()
for province in province_list:
province_data = province_cases[province_cases['Province'] == province]
for i in range(0, len(province_data)):
if (i == 0):
province_data['Total Confirmed Cases'].iloc[i] = province_data['New Cases'].iloc[i]
province_data['Total Recovered'].iloc[i] = province_data['Cured Cases'].iloc[i]
province_data['Total Deaths'].iloc[i] = province_data['Death Cases'].iloc[i]
province_data['Active Cases'].iloc[i] = province_data['New Cases'].iloc[i] - province_data['Death Cases'].iloc[i] - province_data['Cured Cases'].iloc[i]
else:
province_data['Total Confirmed Cases'].iloc[i] = province_data['Total Confirmed Cases'].iloc[i-1] + province_data['New Cases'].iloc[i]
province_data['Total Recovered'].iloc[i] = province_data['Total Recovered'].iloc[i-1] + province_data['Cured Cases'].iloc[i]
province_data['Total Deaths'].iloc[i] = province_data['Total Deaths'].iloc[i-1] + province_data['Death Cases'].iloc[i]
province_data['Active Cases'].iloc[i] = province_data['Active Cases'].iloc[i-1] + province_data['New Cases'].iloc[i] - province_data['Death Cases'].iloc[i] - province_data['Cured Cases'].iloc[i]
province_cases.update(province_data)
province_cases = province_cases.astype({"New Cases":int,"Cured Cases":int,"Death Cases":int, "Total Confirmed Cases": int,"Total Recovered":int,"Total Deaths":int,"Active Cases": int})
province_cases.dtypes
province_cases.head(50)
s_cases = pak_data.groupby('Province')['New Cases','Cured Cases','Death Cases'].sum().reset_index()
s_cases['Active Cases'] = s_cases['New Cases'] - s_cases['Death Cases']- s_cases['Cured Cases']
s_cases["Death Rate (per 100)"] = np.round(100*s_cases["Death Cases"]/s_cases["New Cases"],2)
s_cases["Cure Rate (per 100)"] = np.round(100*s_cases["Cured Cases"]/s_cases["New Cases"],2)
s_cases.sort_values('New Cases', ascending= False).fillna(0).style.background_gradient(cmap='coolwarm',subset=["New Cases"])\
.background_gradient(cmap='Blues',subset=["Death Cases"])\
.background_gradient(cmap='Blues',subset=["Cured Cases"])\
.background_gradient(cmap='Blues',subset=["Active Cases"])\
.background_gradient(cmap='Blues',subset=["Death Rate (per 100)"])\
.background_gradient(cmap='Blues',subset=["Cure Rate (per 100)"])
province_cases.loc[province_cases['Province'] == 'Sindh']
# For Sindh
s = province_cases.loc[province_cases['Province'] == 'Sindh']
confirmed = s['Total Confirmed Cases'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(s['Date'])
data['y'] = confirmed
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)
fig = go.Figure()
fig.add_trace(go.Scatter(x=s['Date'], y=s['Total Confirmed Cases'],
mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Sindh Confirmed Cases (Predicted vs Actual)')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
# For Sindh
confirmed = s['Total Deaths'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(s['Date'])
data['y'] = confirmed
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)
fig = go.Figure()
fig.add_trace(go.Scatter(x=s['Date'], y=s['Total Deaths'],
mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
mode='lines+markers',marker_color='red',name='Predicted'))
fig.update_layout(title_text = 'Sindh Death Cases (Predicted vs Actual)')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
# For Sindh
s = province_cases.loc[province_cases['Province'] == 'Sindh']
confirmed = s['Total Recovered'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(s['Date'])
data['y'] = confirmed
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)
fig = go.Figure()
fig.add_trace(go.Scatter(x=s['Date'], y=s['Total Recovered'],
mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
mode='lines+markers',marker_color='yellow',name='Predicted'))
fig.update_layout(title_text = 'Sindh Recovered Cases (Predicted vs Actual)')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
cities = pak_data.groupby(['Date', 'City'])['New Cases','Cured Cases','Death Cases'].sum().reset_index()
cities['Total Confirmed Cases'] = 0
cities['Total Recovered'] = 0
cities['Total Deaths'] = 0
cities['Active Cases'] = 0
city_list = cities['City'].unique()
for city in city_list:
city_data = cities[cities['City'] == city]
for i in range(0, len(city_data)):
if (i == 0):
city_data['Total Confirmed Cases'].iloc[i] = city_data['New Cases'].iloc[i]
city_data['Total Recovered'].iloc[i] = city_data['Cured Cases'].iloc[i]
city_data['Total Deaths'].iloc[i] = city_data['Death Cases'].iloc[i]
city_data['Active Cases'].iloc[i] = city_data['New Cases'].iloc[i] - city_data['Death Cases'].iloc[i] - city_data['Cured Cases'].iloc[i]
else:
city_data['Total Confirmed Cases'].iloc[i] = city_data['Total Confirmed Cases'].iloc[i-1] + city_data['New Cases'].iloc[i]
city_data['Total Recovered'].iloc[i] = city_data['Total Recovered'].iloc[i-1] + city_data['Cured Cases'].iloc[i]
city_data['Total Deaths'].iloc[i] = city_data['Total Deaths'].iloc[i-1] + city_data['Death Cases'].iloc[i]
city_data['Active Cases'].iloc[i] = city_data['Active Cases'].iloc[i-1] + city_data['New Cases'].iloc[i] - city_data['Death Cases'].iloc[i] - city_data['Cured Cases'].iloc[i]
cities.update(city_data)
cities = cities.astype({"New Cases":int,"Cured Cases":int,"Death Cases":int, "Total Confirmed Cases": int,"Total Recovered":int,"Total Deaths":int,"Active Cases": int})
c_cases = cities.groupby('City')['New Cases','Cured Cases','Death Cases'].sum().reset_index()
c_cases = c_cases.sort_values(['New Cases'], ascending=False)
c_cases['Active Cases'] = c_cases['New Cases'] - c_cases['Death Cases']- c_cases['Cured Cases']
c_cases["Death Rate (per 100)"] = np.round(100*c_cases["Death Cases"]/c_cases["New Cases"],2)
c_cases["Cure Rate (per 100)"] = np.round(100*c_cases["Cured Cases"]/c_cases["New Cases"],2)
c_cases.sort_values('New Cases', ascending= False).fillna(0).style.background_gradient(cmap='coolwarm',subset=["New Cases"])\
.background_gradient(cmap='Blues',subset=["Death Cases"])\
.background_gradient(cmap='Blues',subset=["Cured Cases"])\
.background_gradient(cmap='Blues',subset=["Active Cases"])\
.background_gradient(cmap='Blues',subset=["Death Rate (per 100)"])\
.background_gradient(cmap='Blues',subset=["Cure Rate (per 100)"])
khi = cities.loc[cities['City'] == 'Karachi']
confirmed = khi['Total Confirmed Cases'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(khi['Date'])
data['y'] = confirmed
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)
fig = go.Figure()
fig.add_trace(go.Scatter(x=khi['Date'], y=khi['Total Confirmed Cases'],mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Karachi Confirmed Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
confirmed = khi['Total Deaths'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(khi['Date'])
data['y'] = confirmed
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)
fig = go.Figure()
fig.add_trace(go.Scatter(x=khi['Date'], y=khi['Total Deaths'],
mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
mode='lines+markers',marker_color='red',name='Predicted'))
fig.update_layout(title_text = 'Karachi Death Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()
confirmed = khi['Total Recovered'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(khi['Date'])
data['y'] = confirmed
prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)
fig = go.Figure()
fig.add_trace(go.Scatter(x=khi['Date'], y=khi['Total Recovered'],mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],mode='lines+markers',marker_color='yellow',name='Predicted'))
fig.update_layout(title_text = 'Karachi Recovered Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()